Commit 26276705 by Roger Sayle Committed by Roger Sayle

expmed.c (enum alg_code): Remove long unused enumeration values.


	* expmed.c (enum alg_code): Remove long unused enumeration values.
        (struct mult_cost): New structure to hold the "score" of a synthetic
	multiply sequence, including both a rtx_cost and a latency field.
	(MULT_COST_LESS): New macro to compare mult_cost to a constant.
	(CHEAPER_MULT_COST): New macro to compare two mult_costs.
	(struct algorithm): Change type of cost field to be mult_cost.
	(synth_mult): Change type of cost_limit argument to be a
	pointer to a mult_cost.  Update all cost comparisons to use the
	new mult_cost infrastructure.  For alg_add_factor and
	alg_sub_factor operations, latency is lower than the rtx_cost.
	(choose_mult_variant):  Update calls to synth_mult.  Perform
	cost comparisons using the new mult_cost infrastructure.
	(expand_mult_highpart): Use alg.cost.cost instead of alg.cost
	to optain the total rtx_cost of a synth_mult "algorithm".

From-SVN: r86954
parent 594a3565
2004-09-01 Roger Sayle <roger@eyesopen.com>
* expmed.c (enum alg_code): Remove long unused enumeration values.
(struct mult_cost): New structure to hold the "score" of a synthetic
multiply sequence, including both a rtx_cost and a latency field.
(MULT_COST_LESS): New macro to compare mult_cost to a constant.
(CHEAPER_MULT_COST): New macro to compare two mult_costs.
(struct algorithm): Change type of cost field to be mult_cost.
(synth_mult): Change type of cost_limit argument to be a
pointer to a mult_cost. Update all cost comparisons to use the
new mult_cost infrastructure. For alg_add_factor and
alg_sub_factor operations, latency is lower than the rtx_cost.
(choose_mult_variant): Update calls to synth_mult. Perform
cost comparisons using the new mult_cost infrastructure.
(expand_mult_highpart): Use alg.cost.cost instead of alg.cost
to optain the total rtx_cost of a synth_mult "algorithm".
2004-09-01 David Edelsohn <edelsohn@gnu.org> 2004-09-01 David Edelsohn <edelsohn@gnu.org>
* config/rs6000/power4.md: Increase store latency to 12. * config/rs6000/power4.md: Increase store latency to 12.
......
...@@ -2153,8 +2153,37 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, ...@@ -2153,8 +2153,37 @@ expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
enum alg_code { alg_zero, alg_m, alg_shift, enum alg_code { alg_zero, alg_m, alg_shift,
alg_add_t_m2, alg_sub_t_m2, alg_add_t_m2, alg_sub_t_m2,
alg_add_factor, alg_sub_factor, alg_add_factor, alg_sub_factor,
alg_add_t2_m, alg_sub_t2_m, alg_add_t2_m, alg_sub_t2_m };
alg_add, alg_subtract, alg_factor, alg_shiftop };
/* This structure holds the "cost" of a multiply sequence. The
"cost" field holds the total rtx_cost of every operator in the
synthetic multiplication sequence, hence cost(a op b) is defined
as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
The "latency" field holds the minimum possible latency of the
synthetic multiply, on a hypothetical infinitely parallel CPU.
This is the critical path, or the maximum height, of the expression
tree which is the sum of rtx_costs on the most expensive path from
any leaf to the root. Hence latency(a op b) is defined as zero for
leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
struct mult_cost {
short cost; /* Total rtx_cost of the multiplication sequence. */
short latency; /* The latency of the multiplication sequence. */
};
/* This macro is used to compare a pointer to a mult_cost against an
single integer "rtx_cost" value. This is equivalent to the macro
CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
#define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
|| ((X)->cost == (Y) && (X)->latency < (Y)))
/* This macro is used to compare two pointers to mult_costs against
each other. The macro returns true if X is cheaper than Y.
Currently, the cheaper of two mult_costs is the one with the
lower "cost". If "cost"s are tied, the lower latency is cheaper. */
#define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
|| ((X)->cost == (Y)->cost \
&& (X)->latency < (Y)->latency))
/* This structure records a sequence of operations. /* This structure records a sequence of operations.
`ops' is the number of operations recorded. `ops' is the number of operations recorded.
...@@ -2177,7 +2206,7 @@ enum alg_code { alg_zero, alg_m, alg_shift, ...@@ -2177,7 +2206,7 @@ enum alg_code { alg_zero, alg_m, alg_shift,
struct algorithm struct algorithm
{ {
short cost; struct mult_cost cost;
short ops; short ops;
/* The size of the OP and LOG fields are not directly related to the /* The size of the OP and LOG fields are not directly related to the
word size, but the worst-case algorithms will be if we have few word size, but the worst-case algorithms will be if we have few
...@@ -2195,7 +2224,7 @@ struct algorithm ...@@ -2195,7 +2224,7 @@ struct algorithm
enum mult_variant {basic_variant, negate_variant, add_variant}; enum mult_variant {basic_variant, negate_variant, add_variant};
static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
int, enum machine_mode mode); const struct mult_cost *, enum machine_mode mode);
static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
struct algorithm *, enum mult_variant *, int); struct algorithm *, enum mult_variant *, int);
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
...@@ -2215,19 +2244,22 @@ static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, ...@@ -2215,19 +2244,22 @@ static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
static void static void
synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
int cost_limit, enum machine_mode mode) const struct mult_cost *cost_limit, enum machine_mode mode)
{ {
int m; int m;
struct algorithm *alg_in, *best_alg; struct algorithm *alg_in, *best_alg;
int cost; struct mult_cost best_cost;
struct mult_cost new_limit;
int op_cost, op_latency;
unsigned HOST_WIDE_INT q; unsigned HOST_WIDE_INT q;
int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
/* Indicate that no algorithm is yet found. If no algorithm /* Indicate that no algorithm is yet found. If no algorithm
is found, this value will be returned and indicate failure. */ is found, this value will be returned and indicate failure. */
alg_out->cost = cost_limit; alg_out->cost.cost = cost_limit->cost + 1;
if (cost_limit <= 0) if (cost_limit->cost < 0
|| (cost_limit->cost == 0 && cost_limit->latency <= 0))
return; return;
/* Restrict the bits of "t" to the multiplication's mode. */ /* Restrict the bits of "t" to the multiplication's mode. */
...@@ -2237,7 +2269,8 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2237,7 +2269,8 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
if (t == 1) if (t == 1)
{ {
alg_out->ops = 1; alg_out->ops = 1;
alg_out->cost = 0; alg_out->cost.cost = 0;
alg_out->cost.latency = 0;
alg_out->op[0] = alg_m; alg_out->op[0] = alg_m;
return; return;
} }
...@@ -2246,12 +2279,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2246,12 +2279,13 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
fail now. */ fail now. */
if (t == 0) if (t == 0)
{ {
if (zero_cost >= cost_limit) if (MULT_COST_LESS (cost_limit, zero_cost))
return; return;
else else
{ {
alg_out->ops = 1; alg_out->ops = 1;
alg_out->cost = zero_cost; alg_out->cost.cost = zero_cost;
alg_out->cost.latency = zero_cost;
alg_out->op[0] = alg_zero; alg_out->op[0] = alg_zero;
return; return;
} }
...@@ -2261,6 +2295,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2261,6 +2295,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
alg_in = alloca (sizeof (struct algorithm)); alg_in = alloca (sizeof (struct algorithm));
best_alg = alloca (sizeof (struct algorithm)); best_alg = alloca (sizeof (struct algorithm));
best_cost = *cost_limit;
/* If we have a group of zero bits at the low-order part of T, try /* If we have a group of zero bits at the low-order part of T, try
multiplying by the remaining bits and then doing a shift. */ multiplying by the remaining bits and then doing a shift. */
...@@ -2274,19 +2309,22 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2274,19 +2309,22 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
/* The function expand_shift will choose between a shift and /* The function expand_shift will choose between a shift and
a sequence of additions, so the observed cost is given as a sequence of additions, so the observed cost is given as
MIN (m * add_cost[mode], shift_cost[mode][m]). */ MIN (m * add_cost[mode], shift_cost[mode][m]). */
cost = m * add_cost[mode]; op_cost = m * add_cost[mode];
if (shift_cost[mode][m] < cost) if (shift_cost[mode][m] < op_cost)
cost = shift_cost[mode][m]; op_cost = shift_cost[mode][m];
synth_mult (alg_in, q, cost_limit - cost, mode); new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
cost += alg_in->cost; synth_mult (alg_in, q, &new_limit, mode);
if (cost < cost_limit)
alg_in->cost.cost += op_cost;
alg_in->cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = m; best_alg->log[best_alg->ops] = m;
best_alg->op[best_alg->ops] = alg_shift; best_alg->op[best_alg->ops] = alg_shift;
cost_limit = cost;
} }
} }
} }
...@@ -2311,34 +2349,40 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2311,34 +2349,40 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
{ {
/* T ends with ...111. Multiply by (T + 1) and subtract 1. */ /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
cost = add_cost[mode]; op_cost = add_cost[mode];
synth_mult (alg_in, t + 1, cost_limit - cost, mode); new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t + 1, &new_limit, mode);
cost += alg_in->cost; alg_in->cost.cost += op_cost;
if (cost < cost_limit) alg_in->cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = 0; best_alg->log[best_alg->ops] = 0;
best_alg->op[best_alg->ops] = alg_sub_t_m2; best_alg->op[best_alg->ops] = alg_sub_t_m2;
cost_limit = cost;
} }
} }
else else
{ {
/* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
cost = add_cost[mode]; op_cost = add_cost[mode];
synth_mult (alg_in, t - 1, cost_limit - cost, mode); new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
synth_mult (alg_in, t - 1, &new_limit, mode);
cost += alg_in->cost; alg_in->cost.cost += op_cost;
if (cost < cost_limit) alg_in->cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = 0; best_alg->log[best_alg->ops] = 0;
best_alg->op[best_alg->ops] = alg_add_t_m2; best_alg->op[best_alg->ops] = alg_add_t_m2;
cost_limit = cost;
} }
} }
} }
...@@ -2360,19 +2404,36 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2360,19 +2404,36 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
if (t % d == 0 && t > d && m < maxm) if (t % d == 0 && t > d && m < maxm)
{ {
cost = add_cost[mode] + shift_cost[mode][m]; /* If the target has a cheap shift-and-add instruction use
if (shiftadd_cost[mode][m] < cost) that in preference to a shift insn followed by an add insn.
cost = shiftadd_cost[mode][m]; Assume that the shift-and-add is "atomic" with a latency
synth_mult (alg_in, t / d, cost_limit - cost, mode); equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
op_cost = add_cost[mode] + shift_cost[mode][m];
if (shiftadd_cost[mode][m] < op_cost)
{
op_cost = shiftadd_cost[mode][m];
op_latency = op_cost;
}
else
op_latency = add_cost[mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_latency;
synth_mult (alg_in, t / d, &new_limit, mode);
cost += alg_in->cost; alg_in->cost.cost += op_cost;
if (cost < cost_limit) alg_in->cost.latency += op_latency;
if (alg_in->cost.latency < op_cost)
alg_in->cost.latency = op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = m; best_alg->log[best_alg->ops] = m;
best_alg->op[best_alg->ops] = alg_add_factor; best_alg->op[best_alg->ops] = alg_add_factor;
cost_limit = cost;
} }
/* Other factors will have been taken care of in the recursion. */ /* Other factors will have been taken care of in the recursion. */
break; break;
...@@ -2381,19 +2442,36 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2381,19 +2442,36 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
if (t % d == 0 && t > d && m < maxm) if (t % d == 0 && t > d && m < maxm)
{ {
cost = add_cost[mode] + shift_cost[mode][m]; /* If the target has a cheap shift-and-subtract insn use
if (shiftsub_cost[mode][m] < cost) that in preference to a shift insn followed by a sub insn.
cost = shiftsub_cost[mode][m]; Assume that the shift-and-sub is "atomic" with a latency
synth_mult (alg_in, t / d, cost_limit - cost, mode); equal to it's cost, otherwise assume that on superscalar
hardware the shift may be executed concurrently with the
earlier steps in the algorithm. */
op_cost = add_cost[mode] + shift_cost[mode][m];
if (shiftsub_cost[mode][m] < op_cost)
{
op_cost = shiftsub_cost[mode][m];
op_latency = op_cost;
}
else
op_latency = add_cost[mode];
new_limit.cost = best_cost.cost - op_cost;
new_limit.cost = best_cost.cost - op_latency;
synth_mult (alg_in, t / d, &new_limit, mode);
cost += alg_in->cost; alg_in->cost.cost += op_cost;
if (cost < cost_limit) alg_in->cost.latency += op_latency;
if (alg_in->cost.latency < op_cost)
alg_in->cost.latency = op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = m; best_alg->log[best_alg->ops] = m;
best_alg->op[best_alg->ops] = alg_sub_factor; best_alg->op[best_alg->ops] = alg_sub_factor;
cost_limit = cost;
} }
break; break;
} }
...@@ -2408,17 +2486,20 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2408,17 +2486,20 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q); m = exact_log2 (q);
if (m >= 0 && m < maxm) if (m >= 0 && m < maxm)
{ {
cost = shiftadd_cost[mode][m]; op_cost = shiftadd_cost[mode][m];
synth_mult (alg_in, (t - 1) >> m, cost_limit - cost, mode); new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
cost += alg_in->cost; synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
if (cost < cost_limit)
alg_in->cost.cost += op_cost;
alg_in->cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = m; best_alg->log[best_alg->ops] = m;
best_alg->op[best_alg->ops] = alg_add_t2_m; best_alg->op[best_alg->ops] = alg_add_t2_m;
cost_limit = cost;
} }
} }
...@@ -2427,36 +2508,38 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, ...@@ -2427,36 +2508,38 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
m = exact_log2 (q); m = exact_log2 (q);
if (m >= 0 && m < maxm) if (m >= 0 && m < maxm)
{ {
cost = shiftsub_cost[mode][m]; op_cost = shiftsub_cost[mode][m];
synth_mult (alg_in, (t + 1) >> m, cost_limit - cost, mode); new_limit.cost = best_cost.cost - op_cost;
new_limit.latency = best_cost.latency - op_cost;
cost += alg_in->cost; synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
if (cost < cost_limit)
alg_in->cost.cost += op_cost;
alg_in->cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
{ {
struct algorithm *x; struct algorithm *x;
best_cost = alg_in->cost;
x = alg_in, alg_in = best_alg, best_alg = x; x = alg_in, alg_in = best_alg, best_alg = x;
best_alg->log[best_alg->ops] = m; best_alg->log[best_alg->ops] = m;
best_alg->op[best_alg->ops] = alg_sub_t2_m; best_alg->op[best_alg->ops] = alg_sub_t2_m;
cost_limit = cost;
} }
} }
} }
/* If cost_limit has not decreased since we stored it in alg_out->cost,
we have not found any algorithm. */
if (cost_limit == alg_out->cost)
return;
/* If we are getting a too long sequence for `struct algorithm' /* If we are getting a too long sequence for `struct algorithm'
to record, make this search fail. */ to record, make this search fail. */
if (best_alg->ops == MAX_BITS_PER_WORD) if (best_alg->ops == MAX_BITS_PER_WORD)
return; return;
/* If best_cost has not decreased, we have not found any algorithm. */
if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
return;
/* Copy the algorithm from temporary space to the space at alg_out. /* Copy the algorithm from temporary space to the space at alg_out.
We avoid using structure assignment because the majority of We avoid using structure assignment because the majority of
best_alg is normally undefined, and this is a critical function. */ best_alg is normally undefined, and this is a critical function. */
alg_out->ops = best_alg->ops + 1; alg_out->ops = best_alg->ops + 1;
alg_out->cost = cost_limit; alg_out->cost = best_cost;
memcpy (alg_out->op, best_alg->op, memcpy (alg_out->op, best_alg->op,
alg_out->ops * sizeof *alg_out->op); alg_out->ops * sizeof *alg_out->op);
memcpy (alg_out->log, best_alg->log, memcpy (alg_out->log, best_alg->log,
...@@ -2479,29 +2562,57 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, ...@@ -2479,29 +2562,57 @@ choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
int mult_cost) int mult_cost)
{ {
struct algorithm alg2; struct algorithm alg2;
struct mult_cost limit;
int op_cost;
*variant = basic_variant; *variant = basic_variant;
synth_mult (alg, val, mult_cost, mode); limit.cost = mult_cost;
limit.latency = mult_cost;
synth_mult (alg, val, &limit, mode);
/* This works only if the inverted value actually fits in an /* This works only if the inverted value actually fits in an
`unsigned int' */ `unsigned int' */
if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
{ {
synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - neg_cost[mode], op_cost = neg_cost[mode];
mode); if (MULT_COST_LESS (&alg->cost, mult_cost))
alg2.cost += neg_cost[mode]; {
if (alg2.cost < alg->cost) limit.cost = alg->cost.cost - op_cost;
limit.latency = alg->cost.latency - op_cost;
}
else
{
limit.cost = mult_cost - op_cost;
limit.latency = mult_cost - op_cost;
}
synth_mult (&alg2, -val, &limit, mode);
alg2.cost.cost += op_cost;
alg2.cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
*alg = alg2, *variant = negate_variant; *alg = alg2, *variant = negate_variant;
} }
/* This proves very useful for division-by-constant. */ /* This proves very useful for division-by-constant. */
synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost[mode], op_cost = add_cost[mode];
mode); if (MULT_COST_LESS (&alg->cost, mult_cost))
alg2.cost += add_cost[mode]; {
if (alg2.cost < alg->cost) limit.cost = alg->cost.cost - op_cost;
limit.latency = alg->cost.latency - op_cost;
}
else
{
limit.cost = mult_cost - op_cost;
limit.latency = mult_cost - op_cost;
}
synth_mult (&alg2, val - 1, &limit, mode);
alg2.cost.cost += op_cost;
alg2.cost.latency += op_cost;
if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
*alg = alg2, *variant = add_variant; *alg = alg2, *variant = add_variant;
return alg->cost < mult_cost; return MULT_COST_LESS (&alg->cost, mult_cost);
} }
/* A subroutine of expand_mult, used for constant multiplications. /* A subroutine of expand_mult, used for constant multiplications.
...@@ -3074,8 +3185,8 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, ...@@ -3074,8 +3185,8 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
{ {
/* See whether the specialized multiplication optabs are /* See whether the specialized multiplication optabs are
cheaper than the shift/add version. */ cheaper than the shift/add version. */
tem = expand_mult_highpart_optab (mode, op0, op1, target, tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
unsignedp, alg.cost + extra_cost); alg.cost.cost + extra_cost);
if (tem) if (tem)
return tem; return tem;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment