Commit 71af73bb by Torbjorn Granlund

(mul_cost, div_cost): New variables.

(mul_widen_cost, mul_highpart_cost): New variables.
(init_expmed): Init new variables.
(expand_mult_highpart): New argument, MAX_COST.
Use new argument in several places.  Simplify code that tries
widening multiply variants.
(expand_divmod): New variables MAX_COST, EXTRA_COST.
Initialize MAX_COST dependent on REM_FLAG.
Calculate and pass cost in all calls of expand_mult_highpart.

From-SVN: r8868
parent 6697c6bf
...@@ -61,11 +61,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap; ...@@ -61,11 +61,16 @@ static int sdiv_pow2_cheap, smod_pow2_cheap;
#define MAX_BITS_PER_WORD BITS_PER_WORD #define MAX_BITS_PER_WORD BITS_PER_WORD
#endif #endif
/* Cost of various pieces of RTL. */ /* Cost of various pieces of RTL. Note that some of these are indexed by shift count,
and some by mode. */
static int add_cost, negate_cost, zero_cost; static int add_cost, negate_cost, zero_cost;
static int shift_cost[MAX_BITS_PER_WORD]; static int shift_cost[MAX_BITS_PER_WORD];
static int shiftadd_cost[MAX_BITS_PER_WORD]; static int shiftadd_cost[MAX_BITS_PER_WORD];
static int shiftsub_cost[MAX_BITS_PER_WORD]; static int shiftsub_cost[MAX_BITS_PER_WORD];
static int mul_cost[NUM_MACHINE_MODES];
static int div_cost[NUM_MACHINE_MODES];
static int mul_widen_cost[NUM_MACHINE_MODES];
static int mul_highpart_cost[NUM_MACHINE_MODES];
void void
init_expmed () init_expmed ()
...@@ -77,6 +82,7 @@ init_expmed () ...@@ -77,6 +82,7 @@ init_expmed ()
rtx shift_insn, shiftadd_insn, shiftsub_insn; rtx shift_insn, shiftadd_insn, shiftsub_insn;
int dummy; int dummy;
int m; int m;
enum machine_mode mode, wider_mode;
start_sequence (); start_sequence ();
...@@ -138,6 +144,32 @@ init_expmed () ...@@ -138,6 +144,32 @@ init_expmed ()
= (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET) = (rtx_cost (gen_rtx (MOD, word_mode, reg, GEN_INT (32)), SET)
<= 2 * add_cost); <= 2 * add_cost);
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
reg = gen_rtx (REG, mode, 10000);
div_cost[(int) mode] = rtx_cost (gen_rtx (UDIV, mode, reg, reg), SET);
mul_cost[(int) mode] = rtx_cost (gen_rtx (MULT, mode, reg, reg), SET);
wider_mode = GET_MODE_WIDER_MODE (mode);
if (wider_mode != VOIDmode)
{
mul_widen_cost[(int) wider_mode]
= rtx_cost (gen_rtx (MULT, wider_mode,
gen_rtx (ZERO_EXTEND, wider_mode, reg),
gen_rtx (ZERO_EXTEND, wider_mode, reg)),
SET);
mul_highpart_cost[(int) mode]
= rtx_cost (gen_rtx (TRUNCATE, mode,
gen_rtx (LSHIFTRT, wider_mode,
gen_rtx (MULT, wider_mode,
gen_rtx (ZERO_EXTEND, wider_mode, reg),
gen_rtx (ZERO_EXTEND, wider_mode, reg)),
GEN_INT (GET_MODE_BITSIZE (mode)))),
SET);
}
}
/* Free the objects we just allocated. */ /* Free the objects we just allocated. */
end_sequence (); end_sequence ();
obfree (free_point); obfree (free_point);
...@@ -2470,14 +2502,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp) ...@@ -2470,14 +2502,17 @@ expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
MODE is the mode of operation and result. MODE is the mode of operation and result.
UNSIGNEDP nonzero means unsigned multiply. */ UNSIGNEDP nonzero means unsigned multiply.
MAX_COST is the total allowed cost for the expanded RTL. */
rtx rtx
expand_mult_highpart (mode, op0, cnst1, target, unsignedp) expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
enum machine_mode mode; enum machine_mode mode;
register rtx op0, target; register rtx op0, target;
unsigned HOST_WIDE_INT cnst1; unsigned HOST_WIDE_INT cnst1;
int unsignedp; int unsignedp;
int max_cost;
{ {
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode); enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
optab mul_highpart_optab; optab mul_highpart_optab;
...@@ -2504,7 +2539,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) ...@@ -2504,7 +2539,8 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
/* expand_mult handles constant multiplication of word_mode /* expand_mult handles constant multiplication of word_mode
or narrower. It does a poor job for large modes. */ or narrower. It does a poor job for large modes. */
if (size < BITS_PER_WORD) if (size < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{ {
/* We have to do this, since expand_binop doesn't do conversion for /* We have to do this, since expand_binop doesn't do conversion for
multiply. Maybe change expand_binop to handle widening multiply? */ multiply. Maybe change expand_binop to handle widening multiply? */
...@@ -2521,14 +2557,19 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) ...@@ -2521,14 +2557,19 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
/* Firstly, try using a multiplication insn that only generates the needed /* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */ high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
{
mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab; mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
target = expand_binop (mode, mul_highpart_optab, target = expand_binop (mode, mul_highpart_optab,
op0, op1, target, unsignedp, OPTAB_DIRECT); op0, op1, target, unsignedp, OPTAB_DIRECT);
if (target) if (target)
return target; return target;
}
/* Secondly, same as above, but use sign flavor opposite of unsignedp. /* Secondly, same as above, but use sign flavor opposite of unsignedp.
Need to adjust the result after the multiplication. */ Need to adjust the result after the multiplication. */
if (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost < max_cost)
{
mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab; mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
target = expand_binop (mode, mul_highpart_optab, target = expand_binop (mode, mul_highpart_optab,
op0, op1, target, unsignedp, OPTAB_DIRECT); op0, op1, target, unsignedp, OPTAB_DIRECT);
...@@ -2536,20 +2577,25 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) ...@@ -2536,20 +2577,25 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
/* We used the wrong signedness. Adjust the result. */ /* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, target, op0, return expand_mult_highpart_adjust (mode, target, op0,
op1, target, unsignedp); op1, target, unsignedp);
}
/* Thirdly, we try to use a widening multiplication, or a wider mode /* Try widening multiplication. */
multiplication. */
moptab = unsignedp ? umul_widen_optab : smul_widen_optab; moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
; && mul_widen_cost[(int) wider_mode] < max_cost)
else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) goto try;
/* Try widening the mode and perform a non-widening multiplication. */
moptab = smul_optab; moptab = smul_optab;
else if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
{ && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
goto try;
/* Try widening multiplication of opposite signedness, and adjust. */ /* Try widening multiplication of opposite signedness, and adjust. */
moptab = unsignedp ? smul_widen_optab : umul_widen_optab; moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing) if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& (mul_widen_cost[(int) wider_mode]
+ 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
{ {
tem = expand_binop (wider_mode, moptab, op0, wide_op1, tem = expand_binop (wider_mode, moptab, op0, wide_op1,
NULL_RTX, ! unsignedp, OPTAB_WIDEN); NULL_RTX, ! unsignedp, OPTAB_WIDEN);
...@@ -2565,11 +2611,9 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp) ...@@ -2565,11 +2611,9 @@ expand_mult_highpart (mode, op0, cnst1, target, unsignedp)
} }
} }
/* As a last resort, try widening the mode and perform a return 0;
non-widening multiplication. */
moptab = smul_optab;
}
try:
/* Pass NULL_RTX as target since TARGET has wrong mode. */ /* Pass NULL_RTX as target since TARGET has wrong mode. */
tem = expand_binop (wider_mode, moptab, op0, wide_op1, tem = expand_binop (wider_mode, moptab, op0, wide_op1,
NULL_RTX, unsignedp, OPTAB_WIDEN); NULL_RTX, unsignedp, OPTAB_WIDEN);
...@@ -2617,6 +2661,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2617,6 +2661,7 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
rtx insn, set; rtx insn, set;
optab optab1, optab2; optab optab1, optab2;
int op1_is_constant, op1_is_pow2; int op1_is_constant, op1_is_pow2;
int max_cost, extra_cost;
op1_is_constant = GET_CODE (op1) == CONST_INT; op1_is_constant = GET_CODE (op1) == CONST_INT;
op1_is_pow2 = (op1_is_constant op1_is_pow2 = (op1_is_constant
...@@ -2721,11 +2766,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2721,11 +2766,14 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
size = GET_MODE_BITSIZE (compute_mode); size = GET_MODE_BITSIZE (compute_mode);
#if 0 #if 0
/* It should be possible to restrict the precision to GET_MODE_BITSIZE /* It should be possible to restrict the precision to GET_MODE_BITSIZE
(mode), and thereby get better code when OP1 is a constant. Do that for (mode), and thereby get better code when OP1 is a constant. Do that
GCC 2.7. It will require going over all usages of SIZE below. */ later. It will require going over all usages of SIZE below. */
size = GET_MODE_BITSIZE (mode); size = GET_MODE_BITSIZE (mode);
#endif #endif
max_cost = div_cost[(int) compute_mode]
- (rem_flag ? mul_cost[(int) compute_mode] + add_cost : 0);
/* Now convert to the best mode to use. */ /* Now convert to the best mode to use. */
if (compute_mode != mode) if (compute_mode != mode)
{ {
...@@ -2826,8 +2874,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2826,8 +2874,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
{ {
rtx t1, t2, t3, t4; rtx t1, t2, t3, t4;
extra_cost = (shift_cost[post_shift - 1]
+ shift_cost[1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml, t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 1); NULL_RTX, 1,
max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
goto fail1; goto fail1;
t2 = force_operand (gen_rtx (MINUS, compute_mode, t2 = force_operand (gen_rtx (MINUS, compute_mode,
...@@ -2850,8 +2901,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2850,8 +2901,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0, t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
build_int_2 (pre_shift, 0), build_int_2 (pre_shift, 0),
NULL_RTX, 1); NULL_RTX, 1);
extra_cost = (shift_cost[pre_shift]
+ shift_cost[post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml, t2 = expand_mult_highpart (compute_mode, t1, ml,
NULL_RTX, 1); NULL_RTX, 1,
max_cost - extra_cost);
if (t2 == 0) if (t2 == 0)
goto fail1; goto fail1;
quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2, quotient = expand_shift (RSHIFT_EXPR, compute_mode, t2,
...@@ -2952,8 +3006,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2952,8 +3006,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
{ {
rtx t1, t2, t3; rtx t1, t2, t3;
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml, t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0); NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
goto fail1; goto fail1;
t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1, t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
...@@ -2972,8 +3029,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -2972,8 +3029,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
rtx t1, t2, t3, t4; rtx t1, t2, t3, t4;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1); ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + 2 * add_cost);
t1 = expand_mult_highpart (compute_mode, op0, ml, t1 = expand_mult_highpart (compute_mode, op0, ml,
NULL_RTX, 0); NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
goto fail1; goto fail1;
t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0), t2 = force_operand (gen_rtx (PLUS, compute_mode, t1, op0),
...@@ -3047,8 +3107,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp) ...@@ -3047,8 +3107,11 @@ expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
build_int_2 (size - 1, 0), NULL_RTX, 0); build_int_2 (size - 1, 0), NULL_RTX, 0);
t2 = expand_binop (compute_mode, xor_optab, op0, t1, t2 = expand_binop (compute_mode, xor_optab, op0, t1,
NULL_RTX, 0, OPTAB_WIDEN); NULL_RTX, 0, OPTAB_WIDEN);
extra_cost = (shift_cost[post_shift]
+ shift_cost[size - 1] + 2 * add_cost);
t3 = expand_mult_highpart (compute_mode, t2, ml, t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1); NULL_RTX, 1,
max_cost - extra_cost);
if (t3 != 0) if (t3 != 0)
{ {
t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3, t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment