Commit 8efc8980 by Richard Sandiford Committed by Richard Sandiford

expmed.c (choose_mult_variant, [...]): New, split from...

	* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
	(expand_mult): ...here.
	(extract_high_half): New, split out from expand_mult_highpart.
	(expand_highpart_optab): Likewise.  Don't clobber target prematurely.
	(expand_highpart): Evaluate the cost of a shift/add sequence,
	then see if any of the specialized optabs are cheaper.

From-SVN: r79673
parent d36d5600
2004-03-19 Richard Sandiford <rsandifo@redhat.com>
* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
(expand_mult): ...here.
(extract_high_half): New, split out from expand_mult_highpart.
(expand_highpart_optab): Likewise. Don't clobber target prematurely.
(expand_highpart): Evaluate the cost of a shift/add sequence,
then see if any of the specialized optabs are cheaper.
2004-03-18 Ian Lance Taylor <ian@wasabisystems.com>
* mklibgcc.in: Remove obsolete MAYBE_USE_COLLECT2.
......
......@@ -2149,11 +2149,24 @@ struct algorithm
char log[MAX_BITS_PER_WORD];
};
/* Indicates the type of fixup needed after a constant multiplication.
BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
the result should be negated, and ADD_VARIANT means that the
multiplicand should be added to the result. */
enum mult_variant {basic_variant, negate_variant, add_variant};
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
struct algorithm *, enum mult_variant *);
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
int, int);
/* Compute and return the best algorithm for multiplying by T.
The algorithm must cost less than cost_limit
If retval.cost >= COST_LIMIT, no algorithm was found and all
......@@ -2396,91 +2409,68 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
alg_out->ops * sizeof *alg_out->log);
}
/* Perform a multiplication and return an rtx for the result.
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
TARGET is a suggestion for where to store the result (an rtx).
/* Find the cheapeast way of multiplying a value of mode MODE by VAL.
Try three variations:
We check specially for a constant integer as OP1.
If you want this check for OP0 as well, then before calling
you should swap the two operands if OP0 would be constant. */
- a shift/add sequence based on VAL itself
- a shift/add sequence based on -VAL, followed by a negation
- a shift/add sequence based on VAL - 1, followed by an addition.
rtx
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
int unsignedp)
{
rtx const_op1 = op1;
Return true if the cheapest of these is better than register
multiplication, describing the algorithm in *ALG and final
fixup in *VARIANT. */
/* synth_mult does an `unsigned int' multiply. As long as the mode is
less than or equal in size to `unsigned int' this doesn't matter.
If the mode is larger than `unsigned int', then synth_mult works only
if the constant value exactly fits in an `unsigned int' without any
truncation. This means that multiplying by negative values does
not work; results are off by 2^32 on a 32 bit machine. */
/* If we are multiplying in DImode, it may still be a win
to try to work with shifts and adds. */
if (GET_CODE (op1) == CONST_DOUBLE
&& GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
&& HOST_BITS_PER_INT >= BITS_PER_WORD
&& CONST_DOUBLE_HIGH (op1) == 0)
const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
&& GET_CODE (op1) == CONST_INT
&& INTVAL (op1) < 0)
const_op1 = 0;
/* We used to test optimize here, on the grounds that it's better to
produce a smaller program when -O is not used.
But this causes such a terrible slowdown sometimes
that it seems better to use synth_mult always. */
if (const_op1 && GET_CODE (const_op1) == CONST_INT
&& (unsignedp || ! flag_trapv))
{
struct algorithm alg;
struct algorithm alg2;
HOST_WIDE_INT val = INTVAL (op1);
HOST_WIDE_INT val_so_far;
rtx insn;
static bool
choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
struct algorithm *alg, enum mult_variant *variant)
{
int mult_cost;
enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
/* op0 must be register to make mult_cost match the precomputed
shiftadd_cost array. */
op0 = force_reg (mode, op0);
/* Try to do the computation three ways: multiply by the negative of OP1
and then negate, do the multiplication directly, or do multiplication
by OP1 - 1. */
struct algorithm alg2;
rtx reg;
mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
mult_cost = MIN (12 * add_cost, mult_cost);
synth_mult (&alg, val, mult_cost);
*variant = basic_variant;
synth_mult (alg, val, mult_cost);
/* This works only if the inverted value actually fits in an
`unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{
synth_mult (&alg2, - val,
(alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
if (alg2.cost + negate_cost < alg.cost)
alg = alg2, variant = negate_variant;
synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
alg2.cost += negate_cost;
if (alg2.cost < alg->cost)
*alg = alg2, *variant = negate_variant;
}
/* This proves very useful for division-by-constant. */
synth_mult (&alg2, val - 1,
(alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
if (alg2.cost + add_cost < alg.cost)
alg = alg2, variant = add_variant;
synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
alg2.cost += add_cost;
if (alg2.cost < alg->cost)
*alg = alg2, *variant = add_variant;
if (alg.cost < mult_cost)
{
/* We found something cheaper than a multiply insn. */
return alg->cost < mult_cost;
}
/* A subroutine of expand_mult, used for constant multiplications.
Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
convenient. Use the shift/add sequence described by ALG and apply
the final fixup specified by VARIANT. */
static rtx
expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
rtx target, const struct algorithm *alg,
enum mult_variant variant)
{
HOST_WIDE_INT val_so_far;
rtx insn, accum, tem;
int opno;
rtx accum, tem;
enum machine_mode nmode;
/* op0 must be register to make mult_cost match the precomputed
shiftadd_cost array. */
op0 = protect_from_queue (op0, 0);
/* Avoid referencing memory over and over.
......@@ -2491,12 +2481,12 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
/* ACCUM starts out either as OP0 or as a zero, depending on
the first operation. */
if (alg.op[0] == alg_zero)
if (alg->op[0] == alg_zero)
{
accum = copy_to_mode_reg (mode, const0_rtx);
val_so_far = 0;
}
else if (alg.op[0] == alg_m)
else if (alg->op[0] == alg_m)
{
accum = copy_to_mode_reg (mode, op0);
val_so_far = 1;
......@@ -2504,18 +2494,18 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
else
abort ();
for (opno = 1; opno < alg.ops; opno++)
for (opno = 1; opno < alg->ops; opno++)
{
int log = alg.log[opno];
int log = alg->log[opno];
int preserve = preserve_subexpressions_p ();
rtx shift_subtarget = preserve ? 0 : accum;
rtx add_target
= (opno == alg.ops - 1 && target != 0 && variant != add_variant
= (opno == alg->ops - 1 && target != 0 && variant != add_variant
&& ! preserve)
? target : 0;
rtx accum_target = preserve ? 0 : accum;
switch (alg.op[opno])
switch (alg->op[opno])
{
case alg_shift:
accum = expand_shift (LSHIFT_EXPR, mode, accum,
......@@ -2527,8 +2517,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
tem = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
add_target
? add_target : accum_target);
add_target ? add_target : accum_target);
val_so_far += (HOST_WIDE_INT) 1 << log;
break;
......@@ -2536,8 +2525,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
tem = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
add_target
? add_target : accum_target);
add_target ? add_target : accum_target);
val_so_far -= (HOST_WIDE_INT) 1 << log;
break;
......@@ -2546,18 +2534,15 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
build_int_2 (log, 0), shift_subtarget,
0);
accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
add_target
? add_target : accum_target);
add_target ? add_target : accum_target);
val_so_far = (val_so_far << log) + 1;
break;
case alg_sub_t2_m:
accum = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), shift_subtarget,
0);
build_int_2 (log, 0), shift_subtarget, 0);
accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
add_target
? add_target : accum_target);
add_target ? add_target : accum_target);
val_so_far = (val_so_far << log) - 1;
break;
......@@ -2565,8 +2550,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
tem = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
add_target
? add_target : accum_target);
add_target ? add_target : accum_target);
val_so_far += val_so_far << log;
break;
......@@ -2596,15 +2580,13 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
}
insn = get_last_insn ();
set_unique_reg_note (insn,
REG_EQUAL,
gen_rtx_MULT (nmode, tem,
GEN_INT (val_so_far)));
set_unique_reg_note (insn, REG_EQUAL,
gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
}
if (variant == negate_variant)
{
val_so_far = - val_so_far;
val_so_far = -val_so_far;
accum = expand_unop (mode, neg_optab, accum, target, 0);
}
else if (variant == add_variant)
......@@ -2617,8 +2599,53 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
abort ();
return accum;
}
}
}
/* Perform a multiplication and return an rtx for the result.
MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
TARGET is a suggestion for where to store the result (an rtx).
We check specially for a constant integer as OP1.
If you want this check for OP0 as well, then before calling
you should swap the two operands if OP0 would be constant. */
rtx
expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
int unsignedp)
{
rtx const_op1 = op1;
enum mult_variant variant;
struct algorithm algorithm;
/* synth_mult does an `unsigned int' multiply. As long as the mode is
less than or equal in size to `unsigned int' this doesn't matter.
If the mode is larger than `unsigned int', then synth_mult works only
if the constant value exactly fits in an `unsigned int' without any
truncation. This means that multiplying by negative values does
not work; results are off by 2^32 on a 32 bit machine. */
/* If we are multiplying in DImode, it may still be a win
to try to work with shifts and adds. */
if (GET_CODE (op1) == CONST_DOUBLE
&& GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
&& HOST_BITS_PER_INT >= BITS_PER_WORD
&& CONST_DOUBLE_HIGH (op1) == 0)
const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
&& GET_CODE (op1) == CONST_INT
&& INTVAL (op1) < 0)
const_op1 = 0;
/* We used to test optimize here, on the grounds that it's better to
produce a smaller program when -O is not used.
But this causes such a terrible slowdown sometimes
that it seems better to use synth_mult always. */
if (const_op1 && GET_CODE (const_op1) == CONST_INT
&& (unsignedp || !flag_trapv)
&& choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
return expand_mult_const (mode, op0, INTVAL (const_op1), target,
&algorithm, variant);
if (GET_CODE (op0) == CONST_DOUBLE)
{
......@@ -2832,70 +2859,46 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
return target;
}
/* Emit code to multiply OP0 and CNST1, putting the high half of the result
in TARGET if that is convenient, and return where the result is. If the
operation can not be performed, 0 is returned.
/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
MODE is the mode of operation and result.
static rtx
extract_high_half (enum machine_mode mode, rtx op)
{
enum machine_mode wider_mode;
UNSIGNEDP nonzero means unsigned multiply.
if (mode == word_mode)
return gen_highpart (mode, op);
MAX_COST is the total allowed cost for the expanded RTL. */
wider_mode = GET_MODE_WIDER_MODE (mode);
op = expand_shift (RSHIFT_EXPR, wider_mode, op,
build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
return convert_modes (mode, wider_mode, op, 0);
}
rtx
expand_mult_highpart (enum machine_mode mode, rtx op0,
unsigned HOST_WIDE_INT cnst1, rtx target,
int unsignedp, int max_cost)
/* Like expand_mult_highpart, but only consider using a multiplication
optab. OP1 is an rtx for the constant operand. */
static rtx
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
rtx target, int unsignedp, int max_cost)
{
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
optab mul_highpart_optab;
enum machine_mode wider_mode;
optab moptab;
rtx tem;
int size = GET_MODE_BITSIZE (mode);
rtx op1, wide_op1;
/* We can't support modes wider than HOST_BITS_PER_INT. */
if (size > HOST_BITS_PER_WIDE_INT)
abort ();
op1 = gen_int_mode (cnst1, mode);
wide_op1
= immed_double_const (cnst1,
(unsignedp
? (HOST_WIDE_INT) 0
: -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
wider_mode);
/* expand_mult handles constant multiplication of word_mode
or narrower. It does a poor job for large modes. */
if (size < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
/* We have to do this, since expand_binop doesn't do conversion for
multiply. Maybe change expand_binop to handle widening multiply? */
op0 = convert_to_mode (wider_mode, op0, unsignedp);
/* We know that this can't have signed overflow, so pretend this is
an unsigned multiply. */
tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
return convert_modes (mode, wider_mode, tem, unsignedp);
}
int size;
if (target == 0)
target = gen_reg_rtx (mode);
wider_mode = GET_MODE_WIDER_MODE (mode);
size = GET_MODE_BITSIZE (mode);
/* Firstly, try using a multiplication insn that only generates the needed
high part of the product, and in the sign flavor of unsignedp. */
if (mul_highpart_cost[(int) mode] < max_cost)
{
mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
target = expand_binop (mode, mul_highpart_optab,
op0, op1, target, unsignedp, OPTAB_DIRECT);
if (target)
return target;
moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
tem = expand_binop (mode, moptab, op0, op1, target,
unsignedp, OPTAB_DIRECT);
if (tem)
return tem;
}
/* Secondly, same as above, but use sign flavor opposite of unsignedp.
......@@ -2904,13 +2907,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
&& (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
< max_cost))
{
mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
target = expand_binop (mode, mul_highpart_optab,
op0, op1, target, unsignedp, OPTAB_DIRECT);
if (target)
moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
tem = expand_binop (mode, moptab, op0, op1, target,
unsignedp, OPTAB_DIRECT);
if (tem)
/* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, target, op0,
op1, target, unsignedp);
return expand_mult_highpart_adjust (mode, tem, op0, op1,
tem, unsignedp);
}
/* Try widening multiplication. */
......@@ -2918,8 +2921,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
&& mul_widen_cost[(int) wider_mode] < max_cost)
{
op1 = force_reg (mode, op1);
goto try;
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
unsignedp, OPTAB_WIDEN);
if (tem)
return extract_high_half (mode, tem);
}
/* Try widening the mode and perform a non-widening multiplication. */
......@@ -2928,8 +2933,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
&& size - 1 < BITS_PER_WORD
&& mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
{
op1 = wide_op1;
goto try;
tem = expand_binop (wider_mode, moptab, op0, op1, 0,
unsignedp, OPTAB_WIDEN);
if (tem)
return extract_high_half (mode, tem);
}
/* Try widening multiplication of opposite signedness, and adjust. */
......@@ -2944,10 +2951,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
NULL_RTX, ! unsignedp, OPTAB_WIDEN);
if (tem != 0)
{
/* Extract the high half of the just generated product. */
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
tem = convert_modes (mode, wider_mode, tem, unsignedp);
tem = extract_high_half (mode, tem);
/* We used the wrong signedness. Adjust the result. */
return expand_mult_highpart_adjust (mode, tem, op0, op1,
target, unsignedp);
......@@ -2955,25 +2959,52 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
}
return 0;
}
try:
/* Pass NULL_RTX as target since TARGET has wrong mode. */
tem = expand_binop (wider_mode, moptab, op0, op1,
NULL_RTX, unsignedp, OPTAB_WIDEN);
if (tem == 0)
return 0;
/* Emit code to multiply OP0 and CNST1, putting the high half of the result
in TARGET if that is convenient, and return where the result is. If the
operation can not be performed, 0 is returned.
/* Extract the high half of the just generated product. */
if (mode == word_mode)
{
return gen_highpart (mode, tem);
}
else
MODE is the mode of operation and result.
UNSIGNEDP nonzero means unsigned multiply.
MAX_COST is the total allowed cost for the expanded RTL. */
rtx
expand_mult_highpart (enum machine_mode mode, rtx op0,
unsigned HOST_WIDE_INT cnst1, rtx target,
int unsignedp, int max_cost)
{
enum machine_mode wider_mode;
enum mult_variant variant;
struct algorithm alg;
rtx op1, tem;
/* We can't support modes wider than HOST_BITS_PER_INT. */
if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
abort ();
op1 = gen_int_mode (cnst1, mode);
/* See whether shift/add multiplication is cheap enough. */
if (choose_mult_variant (mode, cnst1, &alg, &variant)
&& (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
{
tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
build_int_2 (size, 0), NULL_RTX, 1);
return convert_modes (mode, wider_mode, tem, unsignedp);
/* See whether the specialized multiplication optabs are
cheaper than the shift/add version. */
tem = expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, alg.cost);
if (tem)
return tem;
wider_mode = GET_MODE_WIDER_MODE (mode);
op0 = convert_to_mode (wider_mode, op0, unsignedp);
tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
return extract_high_half (mode, tem);
}
return expand_mult_highpart_optab (mode, op0, op1, target,
unsignedp, max_cost);
}
/* Emit the code to divide OP0 by OP1, putting the result in TARGET
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment