Commit 7963ac37 by Richard Kenner

(shift_cost): Now a vector.

(shiftadd_cost): New vector for cost of (N * a + b) instructions.
(shiftsub_cost): New vector for cost of (N * a - b) instructions.
(lea_cost): Removed.
(init_expmed): Initialize new vectors.  Use ASHIFT, not LSHIFT.
Remove code initializing lea_cost.
(enum alg_code): New definition.
(synth_mult): Rewrite for better algorithms and faster operation.
(expand_mult): Rewrite code for constant multiplication.

From-SVN: r3735
parent f0052887
...@@ -50,7 +50,10 @@ int mult_is_very_cheap; ...@@ -50,7 +50,10 @@ int mult_is_very_cheap;
static int sdiv_pow2_cheap, smod_pow2_cheap; static int sdiv_pow2_cheap, smod_pow2_cheap;
/* Cost of various pieces of RTL. */ /* Cost of various pieces of RTL. */
static int add_cost, shift_cost, mult_cost, negate_cost, lea_cost; static int add_cost, mult_cost, negate_cost;
static int shift_cost[BITS_PER_WORD];
static int shiftadd_cost[BITS_PER_WORD];
static int shiftsub_cost[BITS_PER_WORD];
/* Max scale factor for scaled address in lea instruction. */ /* Max scale factor for scaled address in lea instruction. */
static int lea_max_mul; static int lea_max_mul;
...@@ -63,44 +66,47 @@ init_expmed () ...@@ -63,44 +66,47 @@ init_expmed ()
to see what insns exist. */ to see what insns exist. */
rtx reg = gen_rtx (REG, word_mode, FIRST_PSEUDO_REGISTER); rtx reg = gen_rtx (REG, word_mode, FIRST_PSEUDO_REGISTER);
rtx pow2 = GEN_INT (32); rtx pow2 = GEN_INT (32);
rtx lea; rtx shift_tmp, shiftadd_tmp, shiftsub_tmp;
HOST_WIDE_INT i; HOST_WIDE_INT i;
int dummy; int dummy;
int m;
add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET); add_cost = rtx_cost (gen_rtx (PLUS, word_mode, reg, reg), SET);
shift_cost = rtx_cost (gen_rtx (LSHIFT, word_mode, reg,
/* Using a constant gives better shift_tmp = gen_rtx (ASHIFT, word_mode, reg, const0_rtx);
estimate of typical costs. shiftadd_tmp = gen_rtx (PLUS, word_mode, gen_rtx (MULT, word_mode, reg, const0_rtx), reg);
1 or 2 might have quirks. */ shiftsub_tmp = gen_rtx (MINUS, word_mode, gen_rtx (MULT, word_mode, reg, const0_rtx), reg);
GEN_INT (3)), SET);
init_recog ();
/* Using 0 as second argument of recog in the loop is not quite right,
but what else is there to do? */
for (m = 1; m < BITS_PER_WORD; m++)
{
shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
XEXP (shift_tmp, 1) = GEN_INT (m);
if (recog (gen_rtx (SET, VOIDmode, reg, shift_tmp), 0, &dummy) >= 0)
shift_cost[m] = rtx_cost (shift_tmp, SET);
XEXP (XEXP (shiftadd_tmp, 0), 1) = GEN_INT ((HOST_WIDE_INT) 1 << m);
if (recog (gen_rtx (SET, VOIDmode, reg, shiftadd_tmp), 0, &dummy) >= 0)
shiftadd_cost[m] = rtx_cost (shiftadd_tmp, SET);
XEXP (XEXP (shiftsub_tmp, 0), 1) = GEN_INT ((HOST_WIDE_INT) 1 << m);
if (recog (gen_rtx (SET, VOIDmode, reg, shiftsub_tmp), 0, &dummy) >= 0)
shiftsub_cost[m] = rtx_cost (shiftsub_tmp, SET);
}
mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg), SET); mult_cost = rtx_cost (gen_rtx (MULT, word_mode, reg, reg), SET);
negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET); negate_cost = rtx_cost (gen_rtx (NEG, word_mode, reg), SET);
/* 999999 is chosen to avoid any plausible faster special case. */ /* 999999 is chosen to avoid any plausible faster special case. */
mult_is_very_cheap mult_is_very_cheap
= (rtx_cost (gen_rtx (MULT, word_mode, reg, GEN_INT (999999)), SET) = (rtx_cost (gen_rtx (MULT, word_mode, reg, GEN_INT (999999)), SET)
< rtx_cost (gen_rtx (LSHIFT, word_mode, reg, GEN_INT (7)), SET)); < rtx_cost (gen_rtx (ASHIFT, word_mode, reg, GEN_INT (7)), SET));
sdiv_pow2_cheap sdiv_pow2_cheap
= rtx_cost (gen_rtx (DIV, word_mode, reg, pow2), SET) <= 2 * add_cost; = rtx_cost (gen_rtx (DIV, word_mode, reg, pow2), SET) <= 2 * add_cost;
smod_pow2_cheap smod_pow2_cheap
= rtx_cost (gen_rtx (MOD, word_mode, reg, pow2), SET) <= 2 * add_cost; = rtx_cost (gen_rtx (MOD, word_mode, reg, pow2), SET) <= 2 * add_cost;
init_recog ();
for (i = 2;; i <<= 1)
{
lea = gen_rtx (SET, VOIDmode, reg,
gen_rtx (PLUS, word_mode,
gen_rtx (MULT, word_mode, reg, GEN_INT (i)),
reg));
/* Using 0 as second argument is not quite right,
but what else is there to do? */
if (recog (lea, 0, &dummy) < 0)
break;
lea_max_mul = i;
lea_cost = rtx_cost (SET_SRC (lea), SET);
}
/* Free the objects we just allocated. */ /* Free the objects we just allocated. */
obfree (free_point); obfree (free_point);
} }
...@@ -1716,7 +1722,10 @@ expand_shift (code, mode, shifted, amount, target, unsignedp) ...@@ -1716,7 +1722,10 @@ expand_shift (code, mode, shifted, amount, target, unsignedp)
return temp; return temp;
} }
enum alg_code { alg_add, alg_subtract, alg_compound }; enum alg_code { alg_none, alg_add_t_m2, alg_sub_t_m2,
alg_add_factor, alg_sub_factor,
alg_add_t2_m, alg_sub_t2_m,
alg_add, alg_subtract, alg_factor, alg_shiftop };
/* This structure records a sequence of operations. /* This structure records a sequence of operations.
`ops' is the number of operations recorded. `ops' is the number of operations recorded.
...@@ -1724,12 +1733,13 @@ enum alg_code { alg_add, alg_subtract, alg_compound }; ...@@ -1724,12 +1733,13 @@ enum alg_code { alg_add, alg_subtract, alg_compound };
The operations are stored in `op' and the corresponding The operations are stored in `op' and the corresponding
integer coefficients in `coeff'. integer coefficients in `coeff'.
These are the operations: These are the operations:
alg_add Add to the total the multiplicand times the coefficient. alg_add_t_m2 total := total + multiplicand * coeff;
alg_subtract Subtract the multiplicand times the coefficient. alg_sub_t_m2 total := total - multiplicand * coeff;
alg_compound This coefficient plus or minus the following one alg_add_factor total := total * coeff + total;
is multiplied into the total. The following operation alg_sub_factor total := total * coeff - total;
is alg_add or alg_subtract to indicate whether to add alg_add_t2_m total := total * coeff + multiplicand;
or subtract the two coefficients. */ alg_sub_t2_m total := total * coeff - multiplicand;
*/
#ifndef MAX_BITS_PER_WORD #ifndef MAX_BITS_PER_WORD
#define MAX_BITS_PER_WORD BITS_PER_WORD #define MAX_BITS_PER_WORD BITS_PER_WORD
...@@ -1737,21 +1747,24 @@ enum alg_code { alg_add, alg_subtract, alg_compound }; ...@@ -1737,21 +1747,24 @@ enum alg_code { alg_add, alg_subtract, alg_compound };
struct algorithm struct algorithm
{ {
int cost; short cost;
unsigned int ops; short ops;
/* The size of the OP and COEFF fields are not directly related to the
word size, but that is the worst-case algorithm for any machine for
the multiplicand 10101010101... */
enum alg_code op[MAX_BITS_PER_WORD]; enum alg_code op[MAX_BITS_PER_WORD];
unsigned HOST_WIDE_INT coeff[MAX_BITS_PER_WORD]; char coeff[MAX_BITS_PER_WORD];
}; };
/* Compute and return the best algorithm for multiplying by T. /* Compute and return the best algorithm for multiplying by T.
Assume that add insns cost ADD_COST and shifts cost SHIFT_COST. The algorithm must cost less than cost_limit
Return cost -1 if would cost more than MAX_COST. */ If retval.cost >= COST_LIMIT, no algorithm was found and all
other field of the returned struct are undefined. */
static struct algorithm static struct algorithm
synth_mult (t, add_cost, shift_cost, max_cost) synth_mult (t, cost_limit)
unsigned HOST_WIDE_INT t; unsigned HOST_WIDE_INT t;
int add_cost, shift_cost; int cost_limit;
int max_cost;
{ {
int m, n; int m, n;
struct algorithm *best_alg struct algorithm *best_alg
...@@ -1760,160 +1773,146 @@ synth_mult (t, add_cost, shift_cost, max_cost) ...@@ -1760,160 +1773,146 @@ synth_mult (t, add_cost, shift_cost, max_cost)
= (struct algorithm *)alloca (sizeof (struct algorithm)); = (struct algorithm *)alloca (sizeof (struct algorithm));
unsigned int cost; unsigned int cost;
/* No matter what happens, we want to return a valid algorithm. */ /* Indicate that no algorithm is yet found. If no algorithm
best_alg->cost = max_cost; is found, this value will be returned and indicate failure. */
best_alg->cost = cost_limit;
best_alg->ops = 0; best_alg->ops = 0;
/* Is t an exponent of 2, so we can just do a shift? */ if (cost_limit < 0)
return *best_alg;
if ((t & -t) == t) /* Is t an exponent of 2, so we can just do a shift? */
if ((t & (t - 1)) == 0)
{ {
if (t > 1) if (t > 1)
{ {
if (max_cost >= shift_cost) m = exact_log2 (t);
if (shift_cost[m] < cost_limit)
{ {
best_alg->cost = shift_cost; best_alg->cost = shift_cost[m];
best_alg->ops = 1; best_alg->ops = 1;
best_alg->op[0] = alg_add; best_alg->op[0] = alg_add_t_m2;
best_alg->coeff[0] = t; best_alg->coeff[0] = m;
} }
else
best_alg->cost = -1;
}
else if (t == 1)
{
if (max_cost >= 0)
best_alg->cost = 0;
} }
else else
/* t == 0 or t == 1 takes no instructions. */
best_alg->cost = 0; best_alg->cost = 0;
return *best_alg; return *best_alg;
} }
/* If MAX_COST just permits as little as an addition (or less), we won't
succeed in synthesizing an algorithm for t. Return immediately with
an indication of failure. */
if (max_cost <= add_cost)
{
best_alg->cost = -1;
return *best_alg;
}
/* Look for factors of t of the form /* Look for factors of t of the form
t = q(2**m +- 1), 2 <= m <= floor(log2(t)) - 1. t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
If we find such a factor, we can multiply by t using an algorithm that If we find such a factor, we can multiply by t using an algorithm that
multiplies by q, shift the result by m and add/subtract it to itself. */ multiplies by q, shift the result by m and add/subtract it to itself.
We search for large factors first and loop down, even if large factors
are less probable than small; if we find a large factor we will find a
good sequence quickly, and therefore be able to prune (by decreasing
COST_LIMIT) the search. */
for (m = floor_log2 (t) - 1; m >= 2; m--) for (m = floor_log2 (t - 1); m >= 2; m--)
{ {
HOST_WIDE_INT m_exp_2 = (HOST_WIDE_INT) 1 << m; unsigned HOST_WIDE_INT d;
HOST_WIDE_INT d;
d = m_exp_2 + 1; d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0) if (t % d == 0 && t > d)
{ {
HOST_WIDE_INT q = t / d; unsigned HOST_WIDE_INT q = t / d;
cost = add_cost + shift_cost * 2; if (shiftadd_cost[m] <= add_cost + shift_cost[m])
cost = shiftadd_cost[m];
else
cost = add_cost + shift_cost[m];
*alg_in = synth_mult (q, add_cost, shift_cost, *alg_in = synth_mult (q, cost_limit - cost);
MIN (max_cost, best_alg->cost) - cost);
if (alg_in->cost >= 0)
{
cost += alg_in->cost; cost += alg_in->cost;
if (cost < best_alg->cost) if (cost < best_alg->cost)
{ {
struct algorithm *x; struct algorithm *x;
x = alg_in; x = alg_in, alg_in = best_alg, best_alg = x;
alg_in = best_alg; best_alg->coeff[best_alg->ops] = m;
best_alg = x; best_alg->op[best_alg->ops++] = alg_add_factor;
best_alg->coeff[best_alg->ops] = m_exp_2; best_alg->cost = cost_limit = cost;
best_alg->op[best_alg->ops++] = alg_compound;
best_alg->coeff[best_alg->ops] = 1;
best_alg->op[best_alg->ops++] = alg_add;
best_alg->cost = cost;
}
} }
} }
d = m_exp_2 - 1; d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0) if (t % d == 0 && t > d)
{ {
HOST_WIDE_INT q = t / d; unsigned HOST_WIDE_INT q = t / d;
cost = add_cost + shift_cost * 2; if (shiftsub_cost[m] <= add_cost + shift_cost[m])
cost = shiftsub_cost[m];
else
cost = add_cost + shift_cost[m];
*alg_in = synth_mult (q, add_cost, shift_cost, *alg_in = synth_mult (q, cost_limit - cost);
MIN (max_cost, best_alg->cost) - cost);
if (alg_in->cost >= 0)
{
cost += alg_in->cost; cost += alg_in->cost;
if (cost < best_alg->cost) if (cost < best_alg->cost)
{ {
struct algorithm *x; struct algorithm *x;
x = alg_in; x = alg_in, alg_in = best_alg, best_alg = x;
alg_in = best_alg; best_alg->coeff[best_alg->ops] = m;
best_alg = x; best_alg->op[best_alg->ops++] = alg_sub_factor;
best_alg->coeff[best_alg->ops] = m_exp_2; best_alg->cost = cost_limit = cost;
best_alg->op[best_alg->ops++] = alg_compound;
best_alg->coeff[best_alg->ops] = 1;
best_alg->op[best_alg->ops++] = alg_subtract;
best_alg->cost = cost;
} }
} }
} }
}
/* Try load effective address instructions, i.e. do a*3, a*5, a*9. */
/* Try shift-and-add (load effective address) instructions,
i.e. do a*3, a*5, a*9. */
if ((t & 1) != 0)
{ {
HOST_WIDE_INT q; unsigned HOST_WIDE_INT q;
HOST_WIDE_INT w;
q = t & -t; /* get out lsb */
w = (t - q) & -(t - q); /* get out next lsb */
if (w / q <= lea_max_mul) q = t - 1;
q = q & -q;
m = exact_log2 (q);
cost = shiftadd_cost[m];
if (cost < cost_limit)
{ {
cost = lea_cost + (q != 1 ? shift_cost : 0); *alg_in = synth_mult ((t - 1) >> m, cost_limit - cost);
*alg_in = synth_mult (t - q - w, add_cost, shift_cost, cost += alg_in->cost;
MIN (max_cost, best_alg->cost) - cost); if (cost < best_alg->cost)
{
struct algorithm *x;
x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->coeff[best_alg->ops] = m;
best_alg->op[best_alg->ops++] = alg_add_t2_m;
best_alg->cost = cost_limit = cost;
}
}
if (alg_in->cost >= 0) q = t + 1;
q = q & -q;
m = exact_log2 (q);
cost = shiftsub_cost[m];
if (cost < cost_limit)
{ {
cost += alg_in->cost; *alg_in = synth_mult ((t + 1) >> m, cost_limit - cost);
/* Use <= to prefer this method to the factoring method cost += alg_in->cost;
when the cost appears the same, because this method if (cost < best_alg->cost)
uses fewer temporary registers. */
if (cost <= best_alg->cost)
{ {
struct algorithm *x; struct algorithm *x;
x = alg_in; x = alg_in, alg_in = best_alg, best_alg = x;
alg_in = best_alg; best_alg->coeff[best_alg->ops] = m;
best_alg = x; best_alg->op[best_alg->ops++] = alg_sub_t2_m;
best_alg->coeff[best_alg->ops] = w; best_alg->cost = cost_limit = cost;
best_alg->op[best_alg->ops++] = alg_add;
best_alg->coeff[best_alg->ops] = q;
best_alg->op[best_alg->ops++] = alg_add;
best_alg->cost = cost;
}
} }
} }
} }
/* Now, use the good old method to add or subtract at the leftmost /* Now, use the simple method of adding or subtracting at the leftmost
1-bit. */ 1-bit. */
{ {
HOST_WIDE_INT q; unsigned HOST_WIDE_INT q;
HOST_WIDE_INT w; unsigned HOST_WIDE_INT w;
q = t & -t; /* get out lsb */ q = t & -t; /* get out lsb */
for (w = q; (w & t) != 0; w <<= 1) for (w = q; (w & t) != 0; w <<= 1)
...@@ -1925,63 +1924,46 @@ synth_mult (t, add_cost, shift_cost, max_cost) ...@@ -1925,63 +1924,46 @@ synth_mult (t, add_cost, shift_cost, max_cost)
{ {
/* There are many bits in a row. Make 'em by subtraction. */ /* There are many bits in a row. Make 'em by subtraction. */
m = exact_log2 (q);
cost = add_cost; cost = add_cost;
if (q != 1) if (q != 1)
cost += shift_cost; cost += shift_cost[m];
*alg_in = synth_mult (t + q, add_cost, shift_cost, *alg_in = synth_mult (t + q, cost_limit - cost);
MIN (max_cost, best_alg->cost) - cost);
if (alg_in->cost >= 0)
{
cost += alg_in->cost; cost += alg_in->cost;
if (cost < best_alg->cost)
/* Use <= to prefer this method to the factoring method
when the cost appears the same, because this method
uses fewer temporary registers. */
if (cost <= best_alg->cost)
{ {
struct algorithm *x; struct algorithm *x;
x = alg_in; x = alg_in, alg_in = best_alg, best_alg = x;
alg_in = best_alg; best_alg->coeff[best_alg->ops] = m;
best_alg = x; best_alg->op[best_alg->ops++] = alg_sub_t_m2;
best_alg->coeff[best_alg->ops] = q; best_alg->cost = cost_limit = cost;
best_alg->op[best_alg->ops++] = alg_subtract;
best_alg->cost = cost;
}
} }
} }
else else
{ {
/* There's only one bit at the left. Make it by addition. */ /* There's only one or two bit at the left. Make it by addition. */
m = exact_log2 (q);
cost = add_cost; cost = add_cost;
if (q != 1) if (q != 1)
cost += shift_cost; cost += shift_cost[m];
*alg_in = synth_mult (t - q, add_cost, shift_cost, *alg_in = synth_mult (t - q, cost_limit - cost);
MIN (max_cost, best_alg->cost) - cost);
if (alg_in->cost >= 0)
{
cost += alg_in->cost; cost += alg_in->cost;
if (cost < best_alg->cost)
if (cost <= best_alg->cost)
{ {
struct algorithm *x; struct algorithm *x;
x = alg_in; x = alg_in, alg_in = best_alg, best_alg = x;
alg_in = best_alg; best_alg->coeff[best_alg->ops] = m;
best_alg = x; best_alg->op[best_alg->ops++] = alg_add_t_m2;
best_alg->coeff[best_alg->ops] = q; best_alg->cost = cost_limit = cost;
best_alg->op[best_alg->ops++] = alg_add;
best_alg->cost = cost;
}
} }
} }
} }
if (best_alg->cost >= max_cost)
best_alg->cost = -1;
return *best_alg; return *best_alg;
} }
...@@ -2021,8 +2003,7 @@ expand_mult (mode, op0, op1, target, unsignedp) ...@@ -2021,8 +2003,7 @@ expand_mult (mode, op0, op1, target, unsignedp)
struct algorithm alg; struct algorithm alg;
struct algorithm neg_alg; struct algorithm neg_alg;
int negate = 0; int negate = 0;
HOST_WIDE_INT absval = INTVAL (op1); HOST_WIDE_INT val = INTVAL (op1);
rtx last;
/* Try to do the computation two ways: multiply by the negative of OP1 /* Try to do the computation two ways: multiply by the negative of OP1
and then negate, or do the multiplication directly. The latter is and then negate, or do the multiplication directly. The latter is
...@@ -2031,21 +2012,20 @@ expand_mult (mode, op0, op1, target, unsignedp) ...@@ -2031,21 +2012,20 @@ expand_mult (mode, op0, op1, target, unsignedp)
has a factor of 2**m +/- 1, while the negated value does not or has a factor of 2**m +/- 1, while the negated value does not or
vice versa. */ vice versa. */
alg = synth_mult (absval, add_cost, shift_cost, mult_cost); alg = synth_mult (val, mult_cost);
neg_alg = synth_mult (- absval, add_cost, shift_cost, neg_alg = synth_mult (- val,
(alg.cost >= 0 ? alg.cost : mult_cost) (alg.cost >= 0 ? alg.cost : mult_cost)
- negate_cost); - negate_cost);
if (neg_alg.cost >= 0 && neg_alg.cost + negate_cost < alg.cost) if (neg_alg.cost + negate_cost < alg.cost)
alg = neg_alg, negate = 1, absval = - absval; alg = neg_alg, negate = 1, val = - val;
if (alg.cost >= 0) if (alg.cost < mult_cost)
{ {
/* If we found something, it must be cheaper than multiply. /* If we found something, it must be cheaper than multiply.
So use it. */ So use it. */
int opno = 0; int opno;
rtx accum, tem; rtx accum, tem;
int factors_seen = 0;
op0 = protect_from_queue (op0, 0); op0 = protect_from_queue (op0, 0);
...@@ -2058,84 +2038,77 @@ expand_mult (mode, op0, op1, target, unsignedp) ...@@ -2058,84 +2038,77 @@ expand_mult (mode, op0, op1, target, unsignedp)
accum = copy_to_mode_reg (mode, op0); accum = copy_to_mode_reg (mode, op0);
else else
{ {
/* 1 if this is the last in a series of adds and subtracts. */ int log = alg.coeff[0];
int last = (1 == alg.ops || alg.op[1] == alg_compound); enum alg_code op = alg.op[0];
int log = floor_log2 (alg.coeff[0]); if (op == alg_add_t_m2)
if (! factors_seen && ! last) {
log -= floor_log2 (alg.coeff[1]);
if (alg.op[0] != alg_add)
abort ();
accum = expand_shift (LSHIFT_EXPR, mode, op0, accum = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0); build_int_2 (log, 0), NULL_RTX, 0);
} }
else if (op == alg_add_t2_m)
while (++opno < alg.ops)
{
int log = floor_log2 (alg.coeff[opno]);
/* 1 if this is the last in a series of adds and subtracts. */
int last = (opno + 1 == alg.ops
|| alg.op[opno + 1] == alg_compound);
/* If we have not yet seen any separate factors (alg_compound)
then turn op0<<a1 + op0<<a2 + op0<<a3... into
(op0<<(a1-a2) + op0)<<(a2-a3) + op0... */
switch (alg.op[opno])
{ {
case alg_add: accum = expand_shift (LSHIFT_EXPR, mode, op0,
if (factors_seen) build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
accum);
}
else if (op == alg_sub_t2_m)
{ {
tem = expand_shift (LSHIFT_EXPR, mode, op0, accum = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0); build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (PLUS, mode, accum, tem), accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
accum); accum);
} }
else else
abort ();
}
for (opno = 1; opno < alg.ops; opno++)
{ {
if (! last) int log = alg.coeff[opno];
log -= floor_log2 (alg.coeff[opno + 1]); switch (alg.op[opno])
accum = force_operand (gen_rtx (PLUS, mode, accum, op0), {
case alg_add_t_m2:
tem = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (PLUS, mode, accum, tem),
accum); accum);
accum = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), accum, 0);
}
break; break;
case alg_subtract: case alg_sub_t_m2:
if (factors_seen)
{
tem = expand_shift (LSHIFT_EXPR, mode, op0, tem = expand_shift (LSHIFT_EXPR, mode, op0,
build_int_2 (log, 0), NULL_RTX, 0); build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (MINUS, mode, accum, tem), accum = force_operand (gen_rtx (MINUS, mode, accum, tem),
accum); accum);
}
else
{
if (! last)
log -= floor_log2 (alg.coeff[opno + 1]);
accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
accum);
accum = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), accum, 0);
}
break; break;
case alg_compound: case alg_add_t2_m:
factors_seen = 1; accum = expand_shift (LSHIFT_EXPR, mode, accum,
tem = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), NULL_RTX, 0); build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (PLUS, mode, accum, op0),
accum);
break;
log = floor_log2 (alg.coeff[opno + 1]); case alg_sub_t2_m:
accum = expand_shift (LSHIFT_EXPR, mode, accum, accum = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), NULL_RTX, 0); build_int_2 (log, 0), NULL_RTX, 0);
opno++; accum = force_operand (gen_rtx (MINUS, mode, accum, op0),
if (alg.op[opno] == alg_add) accum);
break;
case alg_add_factor:
tem = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (PLUS, mode, tem, accum), accum = force_operand (gen_rtx (PLUS, mode, tem, accum),
tem); tem);
else break;
case alg_sub_factor:
tem = expand_shift (LSHIFT_EXPR, mode, accum,
build_int_2 (log, 0), NULL_RTX, 0);
accum = force_operand (gen_rtx (MINUS, mode, tem, accum), accum = force_operand (gen_rtx (MINUS, mode, tem, accum),
tem); tem);
break;
} }
} }
...@@ -2150,13 +2123,13 @@ expand_mult (mode, op0, op1, target, unsignedp) ...@@ -2150,13 +2123,13 @@ expand_mult (mode, op0, op1, target, unsignedp)
Torbjorn: Can you do this? */ Torbjorn: Can you do this? */
if (exact_log2 (absval) < 0) if (exact_log2 (val) < 0)
{ {
last = get_last_insn (); rtx last = get_last_insn ();
REG_NOTES (last) REG_NOTES (last)
= gen_rtx (EXPR_LIST, REG_EQUAL, = gen_rtx (EXPR_LIST, REG_EQUAL,
gen_rtx (MULT, mode, op0, gen_rtx (MULT, mode, op0,
negate ? GEN_INT (absval) : op1), negate ? GEN_INT (val) : op1),
REG_NOTES (last)); REG_NOTES (last));
} }
...@@ -2166,7 +2139,7 @@ expand_mult (mode, op0, op1, target, unsignedp) ...@@ -2166,7 +2139,7 @@ expand_mult (mode, op0, op1, target, unsignedp)
} }
/* This used to use umul_optab if unsigned, /* This used to use umul_optab if unsigned,
but I think that for non-widening multiply there is no difference but for non-widening multiply there is no difference
between signed and unsigned. */ between signed and unsigned. */
op0 = expand_binop (mode, smul_optab, op0 = expand_binop (mode, smul_optab,
op0, op1, target, unsignedp, OPTAB_LIB_WIDEN); op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment